bitkeeper revision 1.1236.1.3 (421bc44363Gqj5L6SBcgLUYMYTmG8A)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 22 Feb 2005 23:46:11 +0000 (23:46 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 22 Feb 2005 23:46:11 +0000 (23:46 +0000)
More FPU cleanups. We emulate CLTS and direct mov to CR0 properly
now, so Linux execution should be correct now, I hope!
Signed-off-by: Keir Fraser <keir@xensource.com>
.rootkeys
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h [deleted file]
xen/arch/x86/traps.c

index 21bbf68a3737262bd8ed4983ac1a132b0cb9caec..996aac70b0cf9a61a9eb756a9aafffe82a63efa7 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h
 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
 41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/vga.h
-40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h
 41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.10-xen-sparse/include/asm-xen/balloon.h
 40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.10-xen-sparse/include/asm-xen/ctrl_if.h
 40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.10-xen-sparse/include/asm-xen/evtchn.h
index 829ba1f5c23c967ac80c918d2470a401252fded6..54f014e6052783ba1a4bcce90edb1fde7ef2d2b3 100644 (file)
@@ -910,7 +910,7 @@ asmlinkage void math_state_restore(struct pt_regs regs)
        if ((regs.xcs & 2) == 0)
                return;
 
-       clts();         /* Allow maths ops (or we recurse) */
+       /* NB. 'clts' is done for us by Xen during virtual trap. */
        if (!tsk->used_math)
                init_fpu(tsk);
        restore_fpu(tsk);
index 78f760df032ffdbd4677597ae9df5868b0dff7f1..6e882483a6cc925e58a1e91eb6313ce8a3ec2127 100644 (file)
@@ -106,8 +106,7 @@ static inline unsigned long _get_base(char * addr)
 /*
  * Clear and set 'TS' bit respectively
  */
-/* NB. 'clts' is done for us by Xen during virtual trap. */
-#define clts() ((void)0)
+#define clts() __asm__ __volatile__ ("clts")
 #define read_cr0() \
        BUG();
 #define write_cr0(x) \
diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h
deleted file mode 100644 (file)
index 79a45de..0000000
+++ /dev/null
@@ -1,884 +0,0 @@
-/*
- * include/asm-i386/xor.h
- *
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * High-speed RAID5 checksumming functions utilizing MMX instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-#define LD(x,y)                "       movq   8*("#x")(%1), %%mm"#y"   ;\n"
-#define ST(x,y)                "       movq %%mm"#y",   8*("#x")(%1)   ;\n"
-#define XO1(x,y)       "       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
-#define XO2(x,y)       "       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
-#define XO3(x,y)       "       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
-#define XO4(x,y)       "       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
-
-#include <asm/i387.h>
-
-static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-       unsigned long lines = bytes >> 7;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-       LD(i,0)                                 \
-               LD(i+1,1)                       \
-                       LD(i+2,2)               \
-                               LD(i+3,3)       \
-       XO1(i,0)                                \
-       ST(i,0)                                 \
-               XO1(i+1,1)                      \
-               ST(i+1,1)                       \
-                       XO1(i+2,2)              \
-                       ST(i+2,2)               \
-                               XO1(i+3,3)      \
-                               ST(i+3,3)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-       BLOCK(0)
-       BLOCK(4)
-       BLOCK(8)
-       BLOCK(12)
-
-       "       addl $128, %1         ;\n"
-       "       addl $128, %2         ;\n"
-       "       decl %0               ;\n"
-       "       jnz 1b                ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3)
-{
-       unsigned long lines = bytes >> 7;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-       LD(i,0)                                 \
-               LD(i+1,1)                       \
-                       LD(i+2,2)               \
-                               LD(i+3,3)       \
-       XO1(i,0)                                \
-               XO1(i+1,1)                      \
-                       XO1(i+2,2)              \
-                               XO1(i+3,3)      \
-       XO2(i,0)                                \
-       ST(i,0)                                 \
-               XO2(i+1,1)                      \
-               ST(i+1,1)                       \
-                       XO2(i+2,2)              \
-                       ST(i+2,2)               \
-                               XO2(i+3,3)      \
-                               ST(i+3,3)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-       BLOCK(0)
-       BLOCK(4)
-       BLOCK(8)
-       BLOCK(12)
-
-       "       addl $128, %1         ;\n"
-       "       addl $128, %2         ;\n"
-       "       addl $128, %3         ;\n"
-       "       decl %0               ;\n"
-       "       jnz 1b                ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3, unsigned long *p4)
-{
-       unsigned long lines = bytes >> 7;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-       LD(i,0)                                 \
-               LD(i+1,1)                       \
-                       LD(i+2,2)               \
-                               LD(i+3,3)       \
-       XO1(i,0)                                \
-               XO1(i+1,1)                      \
-                       XO1(i+2,2)              \
-                               XO1(i+3,3)      \
-       XO2(i,0)                                \
-               XO2(i+1,1)                      \
-                       XO2(i+2,2)              \
-                               XO2(i+3,3)      \
-       XO3(i,0)                                \
-       ST(i,0)                                 \
-               XO3(i+1,1)                      \
-               ST(i+1,1)                       \
-                       XO3(i+2,2)              \
-                       ST(i+2,2)               \
-                               XO3(i+3,3)      \
-                               ST(i+3,3)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-       BLOCK(0)
-       BLOCK(4)
-       BLOCK(8)
-       BLOCK(12)
-
-       "       addl $128, %1         ;\n"
-       "       addl $128, %2         ;\n"
-       "       addl $128, %3         ;\n"
-       "       addl $128, %4         ;\n"
-       "       decl %0               ;\n"
-       "       jnz 1b                ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-
-static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-       unsigned long lines = bytes >> 7;
-
-       kernel_fpu_begin();
-
-       /* Make sure GCC forgets anything it knows about p4 or p5,
-          such that it won't pass to the asm volatile below a
-          register that is shared with any other variable.  That's
-          because we modify p4 and p5 there, but we can't mark them
-          as read/write, otherwise we'd overflow the 10-asm-operands
-          limit of GCC < 3.1.  */
-       __asm__ ("" : "+r" (p4), "+r" (p5));
-
-       __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-       LD(i,0)                                 \
-               LD(i+1,1)                       \
-                       LD(i+2,2)               \
-                               LD(i+3,3)       \
-       XO1(i,0)                                \
-               XO1(i+1,1)                      \
-                       XO1(i+2,2)              \
-                               XO1(i+3,3)      \
-       XO2(i,0)                                \
-               XO2(i+1,1)                      \
-                       XO2(i+2,2)              \
-                               XO2(i+3,3)      \
-       XO3(i,0)                                \
-               XO3(i+1,1)                      \
-                       XO3(i+2,2)              \
-                               XO3(i+3,3)      \
-       XO4(i,0)                                \
-       ST(i,0)                                 \
-               XO4(i+1,1)                      \
-               ST(i+1,1)                       \
-                       XO4(i+2,2)              \
-                       ST(i+2,2)               \
-                               XO4(i+3,3)      \
-                               ST(i+3,3)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-       BLOCK(0)
-       BLOCK(4)
-       BLOCK(8)
-       BLOCK(12)
-
-       "       addl $128, %1         ;\n"
-       "       addl $128, %2         ;\n"
-       "       addl $128, %3         ;\n"
-       "       addl $128, %4         ;\n"
-       "       addl $128, %5         ;\n"
-       "       decl %0               ;\n"
-       "       jnz 1b                ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       : "r" (p4), "r" (p5) 
-       : "memory");
-
-       /* p4 and p5 were modified, and now the variables are dead.
-          Clobber them just to be sure nobody does something stupid
-          like assuming they have some legal value.  */
-       __asm__ ("" : "=r" (p4), "=r" (p5));
-
-       kernel_fpu_end();
-}
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef BLOCK
-
-static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-       unsigned long lines = bytes >> 6;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-       " .align 32                  ;\n"
-       " 1:                         ;\n"
-       "       movq   (%1), %%mm0   ;\n"
-       "       movq  8(%1), %%mm1   ;\n"
-       "       pxor   (%2), %%mm0   ;\n"
-       "       movq 16(%1), %%mm2   ;\n"
-       "       movq %%mm0,   (%1)   ;\n"
-       "       pxor  8(%2), %%mm1   ;\n"
-       "       movq 24(%1), %%mm3   ;\n"
-       "       movq %%mm1,  8(%1)   ;\n"
-       "       pxor 16(%2), %%mm2   ;\n"
-       "       movq 32(%1), %%mm4   ;\n"
-       "       movq %%mm2, 16(%1)   ;\n"
-       "       pxor 24(%2), %%mm3   ;\n"
-       "       movq 40(%1), %%mm5   ;\n"
-       "       movq %%mm3, 24(%1)   ;\n"
-       "       pxor 32(%2), %%mm4   ;\n"
-       "       movq 48(%1), %%mm6   ;\n"
-       "       movq %%mm4, 32(%1)   ;\n"
-       "       pxor 40(%2), %%mm5   ;\n"
-       "       movq 56(%1), %%mm7   ;\n"
-       "       movq %%mm5, 40(%1)   ;\n"
-       "       pxor 48(%2), %%mm6   ;\n"
-       "       pxor 56(%2), %%mm7   ;\n"
-       "       movq %%mm6, 48(%1)   ;\n"
-       "       movq %%mm7, 56(%1)   ;\n"
-       
-       "       addl $64, %1         ;\n"
-       "       addl $64, %2         ;\n"
-       "       decl %0              ;\n"
-       "       jnz 1b               ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3)
-{
-       unsigned long lines = bytes >> 6;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-       " .align 32,0x90             ;\n"
-       " 1:                         ;\n"
-       "       movq   (%1), %%mm0   ;\n"
-       "       movq  8(%1), %%mm1   ;\n"
-       "       pxor   (%2), %%mm0   ;\n"
-       "       movq 16(%1), %%mm2   ;\n"
-       "       pxor  8(%2), %%mm1   ;\n"
-       "       pxor   (%3), %%mm0   ;\n"
-       "       pxor 16(%2), %%mm2   ;\n"
-       "       movq %%mm0,   (%1)   ;\n"
-       "       pxor  8(%3), %%mm1   ;\n"
-       "       pxor 16(%3), %%mm2   ;\n"
-       "       movq 24(%1), %%mm3   ;\n"
-       "       movq %%mm1,  8(%1)   ;\n"
-       "       movq 32(%1), %%mm4   ;\n"
-       "       movq 40(%1), %%mm5   ;\n"
-       "       pxor 24(%2), %%mm3   ;\n"
-       "       movq %%mm2, 16(%1)   ;\n"
-       "       pxor 32(%2), %%mm4   ;\n"
-       "       pxor 24(%3), %%mm3   ;\n"
-       "       pxor 40(%2), %%mm5   ;\n"
-       "       movq %%mm3, 24(%1)   ;\n"
-       "       pxor 32(%3), %%mm4   ;\n"
-       "       pxor 40(%3), %%mm5   ;\n"
-       "       movq 48(%1), %%mm6   ;\n"
-       "       movq %%mm4, 32(%1)   ;\n"
-       "       movq 56(%1), %%mm7   ;\n"
-       "       pxor 48(%2), %%mm6   ;\n"
-       "       movq %%mm5, 40(%1)   ;\n"
-       "       pxor 56(%2), %%mm7   ;\n"
-       "       pxor 48(%3), %%mm6   ;\n"
-       "       pxor 56(%3), %%mm7   ;\n"
-       "       movq %%mm6, 48(%1)   ;\n"
-       "       movq %%mm7, 56(%1)   ;\n"
-      
-       "       addl $64, %1         ;\n"
-       "       addl $64, %2         ;\n"
-       "       addl $64, %3         ;\n"
-       "       decl %0              ;\n"
-       "       jnz 1b               ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       :
-       : "memory" );
-
-       kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3, unsigned long *p4)
-{
-       unsigned long lines = bytes >> 6;
-
-       kernel_fpu_begin();
-
-       __asm__ __volatile__ (
-       " .align 32,0x90             ;\n"
-       " 1:                         ;\n"
-       "       movq   (%1), %%mm0   ;\n"
-       "       movq  8(%1), %%mm1   ;\n"
-       "       pxor   (%2), %%mm0   ;\n"
-       "       movq 16(%1), %%mm2   ;\n"
-       "       pxor  8(%2), %%mm1   ;\n"
-       "       pxor   (%3), %%mm0   ;\n"
-       "       pxor 16(%2), %%mm2   ;\n"
-       "       pxor  8(%3), %%mm1   ;\n"
-       "       pxor   (%4), %%mm0   ;\n"
-       "       movq 24(%1), %%mm3   ;\n"
-       "       pxor 16(%3), %%mm2   ;\n"
-       "       pxor  8(%4), %%mm1   ;\n"
-       "       movq %%mm0,   (%1)   ;\n"
-       "       movq 32(%1), %%mm4   ;\n"
-       "       pxor 24(%2), %%mm3   ;\n"
-       "       pxor 16(%4), %%mm2   ;\n"
-       "       movq %%mm1,  8(%1)   ;\n"
-       "       movq 40(%1), %%mm5   ;\n"
-       "       pxor 32(%2), %%mm4   ;\n"
-       "       pxor 24(%3), %%mm3   ;\n"
-       "       movq %%mm2, 16(%1)   ;\n"
-       "       pxor 40(%2), %%mm5   ;\n"
-       "       pxor 32(%3), %%mm4   ;\n"
-       "       pxor 24(%4), %%mm3   ;\n"
-       "       movq %%mm3, 24(%1)   ;\n"
-       "       movq 56(%1), %%mm7   ;\n"
-       "       movq 48(%1), %%mm6   ;\n"
-       "       pxor 40(%3), %%mm5   ;\n"
-       "       pxor 32(%4), %%mm4   ;\n"
-       "       pxor 48(%2), %%mm6   ;\n"
-       "       movq %%mm4, 32(%1)   ;\n"
-       "       pxor 56(%2), %%mm7   ;\n"
-       "       pxor 40(%4), %%mm5   ;\n"
-       "       pxor 48(%3), %%mm6   ;\n"
-       "       pxor 56(%3), %%mm7   ;\n"
-       "       movq %%mm5, 40(%1)   ;\n"
-       "       pxor 48(%4), %%mm6   ;\n"
-       "       pxor 56(%4), %%mm7   ;\n"
-       "       movq %%mm6, 48(%1)   ;\n"
-       "       movq %%mm7, 56(%1)   ;\n"
-      
-       "       addl $64, %1         ;\n"
-       "       addl $64, %2         ;\n"
-       "       addl $64, %3         ;\n"
-       "       addl $64, %4         ;\n"
-       "       decl %0              ;\n"
-       "       jnz 1b               ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-       unsigned long lines = bytes >> 6;
-
-       kernel_fpu_begin();
-
-       /* Make sure GCC forgets anything it knows about p4 or p5,
-          such that it won't pass to the asm volatile below a
-          register that is shared with any other variable.  That's
-          because we modify p4 and p5 there, but we can't mark them
-          as read/write, otherwise we'd overflow the 10-asm-operands
-          limit of GCC < 3.1.  */
-       __asm__ ("" : "+r" (p4), "+r" (p5));
-
-       __asm__ __volatile__ (
-       " .align 32,0x90             ;\n"
-       " 1:                         ;\n"
-       "       movq   (%1), %%mm0   ;\n"
-       "       movq  8(%1), %%mm1   ;\n"
-       "       pxor   (%2), %%mm0   ;\n"
-       "       pxor  8(%2), %%mm1   ;\n"
-       "       movq 16(%1), %%mm2   ;\n"
-       "       pxor   (%3), %%mm0   ;\n"
-       "       pxor  8(%3), %%mm1   ;\n"
-       "       pxor 16(%2), %%mm2   ;\n"
-       "       pxor   (%4), %%mm0   ;\n"
-       "       pxor  8(%4), %%mm1   ;\n"
-       "       pxor 16(%3), %%mm2   ;\n"
-       "       movq 24(%1), %%mm3   ;\n"
-       "       pxor   (%5), %%mm0   ;\n"
-       "       pxor  8(%5), %%mm1   ;\n"
-       "       movq %%mm0,   (%1)   ;\n"
-       "       pxor 16(%4), %%mm2   ;\n"
-       "       pxor 24(%2), %%mm3   ;\n"
-       "       movq %%mm1,  8(%1)   ;\n"
-       "       pxor 16(%5), %%mm2   ;\n"
-       "       pxor 24(%3), %%mm3   ;\n"
-       "       movq 32(%1), %%mm4   ;\n"
-       "       movq %%mm2, 16(%1)   ;\n"
-       "       pxor 24(%4), %%mm3   ;\n"
-       "       pxor 32(%2), %%mm4   ;\n"
-       "       movq 40(%1), %%mm5   ;\n"
-       "       pxor 24(%5), %%mm3   ;\n"
-       "       pxor 32(%3), %%mm4   ;\n"
-       "       pxor 40(%2), %%mm5   ;\n"
-       "       movq %%mm3, 24(%1)   ;\n"
-       "       pxor 32(%4), %%mm4   ;\n"
-       "       pxor 40(%3), %%mm5   ;\n"
-       "       movq 48(%1), %%mm6   ;\n"
-       "       movq 56(%1), %%mm7   ;\n"
-       "       pxor 32(%5), %%mm4   ;\n"
-       "       pxor 40(%4), %%mm5   ;\n"
-       "       pxor 48(%2), %%mm6   ;\n"
-       "       pxor 56(%2), %%mm7   ;\n"
-       "       movq %%mm4, 32(%1)   ;\n"
-       "       pxor 48(%3), %%mm6   ;\n"
-       "       pxor 56(%3), %%mm7   ;\n"
-       "       pxor 40(%5), %%mm5   ;\n"
-       "       pxor 48(%4), %%mm6   ;\n"
-       "       pxor 56(%4), %%mm7   ;\n"
-       "       movq %%mm5, 40(%1)   ;\n"
-       "       pxor 48(%5), %%mm6   ;\n"
-       "       pxor 56(%5), %%mm7   ;\n"
-       "       movq %%mm6, 48(%1)   ;\n"
-       "       movq %%mm7, 56(%1)   ;\n"
-      
-       "       addl $64, %1         ;\n"
-       "       addl $64, %2         ;\n"
-       "       addl $64, %3         ;\n"
-       "       addl $64, %4         ;\n"
-       "       addl $64, %5         ;\n"
-       "       decl %0              ;\n"
-       "       jnz 1b               ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       : "r" (p4), "r" (p5)
-       : "memory");
-
-       /* p4 and p5 were modified, and now the variables are dead.
-          Clobber them just to be sure nobody does something stupid
-          like assuming they have some legal value.  */
-       __asm__ ("" : "=r" (p4), "=r" (p5));
-
-       kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_pII_mmx = {
-       .name = "pII_mmx",
-       .do_2 = xor_pII_mmx_2,
-       .do_3 = xor_pII_mmx_3,
-       .do_4 = xor_pII_mmx_4,
-       .do_5 = xor_pII_mmx_5,
-};
-
-static struct xor_block_template xor_block_p5_mmx = {
-       .name = "p5_mmx",
-       .do_2 = xor_p5_mmx_2,
-       .do_3 = xor_p5_mmx_3,
-       .do_4 = xor_p5_mmx_4,
-       .do_5 = xor_p5_mmx_5,
-};
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define XMMS_SAVE do {                         \
-       preempt_disable();                      \
-       if (!(current_thread_info()->status & TS_USEDFPU))      \
-               clts();                         \
-       __asm__ __volatile__ (                  \
-               "movups %%xmm0,(%1)     ;\n\t"  \
-               "movups %%xmm1,0x10(%1) ;\n\t"  \
-               "movups %%xmm2,0x20(%1) ;\n\t"  \
-               "movups %%xmm3,0x30(%1) ;\n\t"  \
-               : "=&r" (cr0)                   \
-               : "r" (xmm_save)                \
-               : "memory");                    \
-} while(0)
-
-#define XMMS_RESTORE do {                      \
-       __asm__ __volatile__ (                  \
-               "sfence                 ;\n\t"  \
-               "movups (%1),%%xmm0     ;\n\t"  \
-               "movups 0x10(%1),%%xmm1 ;\n\t"  \
-               "movups 0x20(%1),%%xmm2 ;\n\t"  \
-               "movups 0x30(%1),%%xmm3 ;\n\t"  \
-               :                               \
-               : "r" (cr0), "r" (xmm_save)     \
-               : "memory");                    \
-       if (!(current_thread_info()->status & TS_USEDFPU))      \
-               stts();                         \
-       preempt_enable();                       \
-} while(0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
-#define OFFS(x)                "16*("#x")"
-#define PF_OFFS(x)     "256+16*("#x")"
-#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%1)            ;\n"
-#define LD(x,y)                "       movaps   "OFFS(x)"(%1), %%xmm"#y"       ;\n"
-#define ST(x,y)                "       movaps %%xmm"#y",   "OFFS(x)"(%1)       ;\n"
-#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%2)            ;\n"
-#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%3)            ;\n"
-#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%4)            ;\n"
-#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%5)            ;\n"
-#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%6)            ;\n"
-#define XO1(x,y)       "       xorps   "OFFS(x)"(%2), %%xmm"#y"        ;\n"
-#define XO2(x,y)       "       xorps   "OFFS(x)"(%3), %%xmm"#y"        ;\n"
-#define XO3(x,y)       "       xorps   "OFFS(x)"(%4), %%xmm"#y"        ;\n"
-#define XO4(x,y)       "       xorps   "OFFS(x)"(%5), %%xmm"#y"        ;\n"
-#define XO5(x,y)       "       xorps   "OFFS(x)"(%6), %%xmm"#y"        ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-        unsigned long lines = bytes >> 8;
-       char xmm_save[16*4] ALIGN16;
-       int cr0;
-
-       XMMS_SAVE;
-
-        __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-               LD(i,0)                                 \
-                       LD(i+1,1)                       \
-               PF1(i)                                  \
-                               PF1(i+2)                \
-                               LD(i+2,2)               \
-                                       LD(i+3,3)       \
-               PF0(i+4)                                \
-                               PF0(i+6)                \
-               XO1(i,0)                                \
-                       XO1(i+1,1)                      \
-                               XO1(i+2,2)              \
-                                       XO1(i+3,3)      \
-               ST(i,0)                                 \
-                       ST(i+1,1)                       \
-                               ST(i+2,2)               \
-                                       ST(i+3,3)       \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-        " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-        "       addl $256, %1           ;\n"
-        "       addl $256, %2           ;\n"
-        "       decl %0                 ;\n"
-        "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2)
-       :
-        : "memory");
-
-       XMMS_RESTORE;
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3)
-{
-        unsigned long lines = bytes >> 8;
-       char xmm_save[16*4] ALIGN16;
-       int cr0;
-
-       XMMS_SAVE;
-
-        __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i+2)                \
-               LD(i,0)                                 \
-                       LD(i+1,1)                       \
-                               LD(i+2,2)               \
-                                       LD(i+3,3)       \
-               PF2(i)                                  \
-                               PF2(i+2)                \
-               PF0(i+4)                                \
-                               PF0(i+6)                \
-               XO1(i,0)                                \
-                       XO1(i+1,1)                      \
-                               XO1(i+2,2)              \
-                                       XO1(i+3,3)      \
-               XO2(i,0)                                \
-                       XO2(i+1,1)                      \
-                               XO2(i+2,2)              \
-                                       XO2(i+3,3)      \
-               ST(i,0)                                 \
-                       ST(i+1,1)                       \
-                               ST(i+2,2)               \
-                                       ST(i+3,3)       \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-        " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-        "       addl $256, %1           ;\n"
-        "       addl $256, %2           ;\n"
-        "       addl $256, %3           ;\n"
-        "       decl %0                 ;\n"
-        "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r"(p2), "+r"(p3)
-       :
-        : "memory" );
-
-       XMMS_RESTORE;
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4)
-{
-        unsigned long lines = bytes >> 8;
-       char xmm_save[16*4] ALIGN16;
-       int cr0;
-
-       XMMS_SAVE;
-
-        __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i+2)                \
-               LD(i,0)                                 \
-                       LD(i+1,1)                       \
-                               LD(i+2,2)               \
-                                       LD(i+3,3)       \
-               PF2(i)                                  \
-                               PF2(i+2)                \
-               XO1(i,0)                                \
-                       XO1(i+1,1)                      \
-                               XO1(i+2,2)              \
-                                       XO1(i+3,3)      \
-               PF3(i)                                  \
-                               PF3(i+2)                \
-               PF0(i+4)                                \
-                               PF0(i+6)                \
-               XO2(i,0)                                \
-                       XO2(i+1,1)                      \
-                               XO2(i+2,2)              \
-                                       XO2(i+3,3)      \
-               XO3(i,0)                                \
-                       XO3(i+1,1)                      \
-                               XO3(i+2,2)              \
-                                       XO3(i+3,3)      \
-               ST(i,0)                                 \
-                       ST(i+1,1)                       \
-                               ST(i+2,2)               \
-                                       ST(i+3,3)       \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-        " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-        "       addl $256, %1           ;\n"
-        "       addl $256, %2           ;\n"
-        "       addl $256, %3           ;\n"
-        "       addl $256, %4           ;\n"
-        "       decl %0                 ;\n"
-        "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
-       :
-        : "memory" );
-
-       XMMS_RESTORE;
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-        unsigned long lines = bytes >> 8;
-       char xmm_save[16*4] ALIGN16;
-       int cr0;
-
-       XMMS_SAVE;
-
-       /* Make sure GCC forgets anything it knows about p4 or p5,
-          such that it won't pass to the asm volatile below a
-          register that is shared with any other variable.  That's
-          because we modify p4 and p5 there, but we can't mark them
-          as read/write, otherwise we'd overflow the 10-asm-operands
-          limit of GCC < 3.1.  */
-       __asm__ ("" : "+r" (p4), "+r" (p5));
-
-        __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i+2)                \
-               LD(i,0)                                 \
-                       LD(i+1,1)                       \
-                               LD(i+2,2)               \
-                                       LD(i+3,3)       \
-               PF2(i)                                  \
-                               PF2(i+2)                \
-               XO1(i,0)                                \
-                       XO1(i+1,1)                      \
-                               XO1(i+2,2)              \
-                                       XO1(i+3,3)      \
-               PF3(i)                                  \
-                               PF3(i+2)                \
-               XO2(i,0)                                \
-                       XO2(i+1,1)                      \
-                               XO2(i+2,2)              \
-                                       XO2(i+3,3)      \
-               PF4(i)                                  \
-                               PF4(i+2)                \
-               PF0(i+4)                                \
-                               PF0(i+6)                \
-               XO3(i,0)                                \
-                       XO3(i+1,1)                      \
-                               XO3(i+2,2)              \
-                                       XO3(i+3,3)      \
-               XO4(i,0)                                \
-                       XO4(i+1,1)                      \
-                               XO4(i+2,2)              \
-                                       XO4(i+3,3)      \
-               ST(i,0)                                 \
-                       ST(i+1,1)                       \
-                               ST(i+2,2)               \
-                                       ST(i+3,3)       \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-        " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-        "       addl $256, %1           ;\n"
-        "       addl $256, %2           ;\n"
-        "       addl $256, %3           ;\n"
-        "       addl $256, %4           ;\n"
-        "       addl $256, %5           ;\n"
-        "       decl %0                 ;\n"
-        "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       : "r" (p4), "r" (p5)
-       : "memory");
-
-       /* p4 and p5 were modified, and now the variables are dead.
-          Clobber them just to be sure nobody does something stupid
-          like assuming they have some legal value.  */
-       __asm__ ("" : "=r" (p4), "=r" (p5));
-
-       XMMS_RESTORE;
-}
-
-static struct xor_block_template xor_block_pIII_sse = {
-        .name = "pIII_sse",
-        .do_2 =  xor_sse_2,
-        .do_3 =  xor_sse_3,
-        .do_4 =  xor_sse_4,
-        .do_5 = xor_sse_5,
-};
-
-/* Also try the generic routines.  */
-#include <asm-generic/xor.h>
-
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES                              \
-       do {                                            \
-               xor_speed(&xor_block_8regs);            \
-               xor_speed(&xor_block_8regs_p);          \
-               xor_speed(&xor_block_32regs);           \
-               xor_speed(&xor_block_32regs_p);         \
-               if (cpu_has_xmm)                        \
-                       xor_speed(&xor_block_pIII_sse); \
-               if (cpu_has_mmx) {                      \
-                       xor_speed(&xor_block_pII_mmx);  \
-                       xor_speed(&xor_block_p5_mmx);   \
-               }                                       \
-       } while (0)
-
-/* We force the use of the SSE xor block because it can write around L2.
-   We may also be able to load into the L1 only depending on how the cpu
-   deals with a load to a line that is being prefetched.  */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-       (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
index 8a3aa56de8f361eda66b70f6aa9b9e37aee7306d..6106e09f2fb8ac1388ffe99315c7f62432d57433 100644 (file)
@@ -351,7 +351,6 @@ asmlinkage int do_page_fault(struct xen_regs *regs)
 
 static int emulate_privileged_op(struct xen_regs *regs)
 {
-    extern long do_fpu_taskswitch(void);
     extern void *decode_reg(struct xen_regs *regs, u8 b);
 
     struct exec_domain *ed = current;
@@ -423,7 +422,16 @@ static int emulate_privileged_op(struct xen_regs *regs)
         {
         case 0: /* Write CR0 */
             if ( *reg & X86_CR0_TS )
-                (void)do_fpu_taskswitch();
+            {
+                set_bit(EDF_GUEST_STTS, &ed->ed_flags);
+                stts();
+            }
+            else
+            {
+                clear_bit(EDF_GUEST_STTS, &ed->ed_flags);
+                if ( test_bit(EDF_USEDFPU, &ed->ed_flags) )
+                    clts();
+            }
             break;
 
         case 2: /* Write CR2 */